# -*- coding = utf-8 -*-
# @Time : 2021/12/3 11:28
# @Author : 龙王赘婿_彪少
# @File : panet.py
# @Software: PyCharm

# 爬取可访问的一些网站

import urllib.request, urllib.error

class PANET:
    def __init__(self):
        self.goodurl = []

    def header_plus(self):
        headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
            'cookie': 'Hm_lvt_cb7f29be3c304cd3bb0c65a4faa96c30=1626761071; Hm_lpvt_cb7f29be3c304cd3bb0c65a4faa96c30=1627519587',
            'Referer': 'http: // www.sky - robotics.cn /'
        }
        return headers

    # 这个标头是简单的，仅告诉浏览器类型
    def header(self):
        headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
        }
        return headers

    def getHtml(self, url):
        # 获取标头
        headers = self.header()
        # 打包一个请求
        request = urllib.request.Request(url=url, headers=headers, method="GET")
        # 接收字符串
        html = ""
        try:
            # 进行请求
            resource = urllib.request.urlopen(request, timeout=10)
            # 解码
            html = resource.read().decode("utf-8")
            # 测试打印
            # print(html)
        # 万一出现问题进行处理
        except urllib.error.URLError as e:
            if hasattr(e, "code"):
                print(e.code)
            if hasattr(e, "reason"):
                print(e.reason)
        return html

    def checkhtml(self, url):
        # 获取标头
        headers = self.header()
        # 打包一个请求
        request = urllib.request.Request(url=url, headers=headers, method="GET")
        try:
            # 进行请求
            resource = urllib.request.urlopen(request, timeout=10)
            # 解码
            self.goodurl.append(url)
            print(url)
        # 万一出现问题进行处理
        except urllib.error.URLError as e:
            if hasattr(e, "code"):
                print(e.code)
            if hasattr(e, "reason"):
                print(e.reason)

    def showurl(self):
        print(self.goodurl)

    def process(self):
        for zimu1 in range(ord('a'), ord('z')+1):
            for shuzi1 in range(10):
                for shuzi2 in range(10):
                    for zimu2 in range(ord('a'), ord('z') + 1):
                        for zimu3 in range(ord('a'), ord('z') + 1):
                            url = "http://www." + str(chr(zimu1)) + str(shuzi1) + str(shuzi2) + str(chr(zimu2)) + str(chr(zimu3)) + ".com"
                            self.checkhtml(url)

if __name__ == '__main__':
    print('begin')
    panet = PANET()
    panet.process()
    panet.showurl()